Compressing Detections into Detection Events for Faster Analysis

Sometimes detection files are difficult to run through complicated algorithms because of their sheer size. A method of summarizing the detection data is to compress these raw detections into events, a single row record for each animal presence at a station over a time period.

library(tidyverse)
library(glatos)
detections_path <- file.path('../data', 'detections.csv')
detections <- glatos::read_glatos_detections(detections_path)

# Filter our detections first
detections <- glatos::false_detections(detections, tf = 3600)
## The filter identified 93 (1.3%) of 7180 detections as potentially false.
filtered_detections <- detections %>% filter(passed_filter != FALSE)

# And create a new detection_events data set from the filtered detections.
detection_events <- glatos::detection_events(filtered_detections, location_col = 'station')
## The event filter distilled 7087 detections down to 3537 distinct detection events.
head(detection_events)  
##   event animal_id location mean_latitude mean_longitude
## 1     1       153  TTB-002      43.39165      -83.99264
## 2     2       153  TTB-001      43.38709      -83.98737
## 3     3       153  TTB-002      43.39165      -83.99264
## 4     4       153  TTB-001      43.38709      -83.98737
## 5     5       153  TTB-002      43.39165      -83.99264
## 6     6       153  TTB-001      43.38709      -83.98737
##       first_detection      last_detection num_detections res_time_sec
## 1 2012-04-29 01:48:37 2012-04-29 02:05:33              8         1016
## 2 2012-04-29 02:08:00 2012-04-29 02:08:00              1            0
## 3 2012-04-29 02:08:00 2012-04-29 02:08:00              1            0
## 4 2012-04-29 02:09:50 2012-04-29 02:09:50              1            0
## 5 2012-04-29 02:09:50 2012-04-29 02:09:50              1            0
## 6 2012-04-29 02:12:24 2012-04-29 02:12:24              1            0
# Let's make our detection intervals, 
# the time period between the first and last detection at a station, 
# into an object called a date-interval. This lets you perform time-based math and some other handy logic
library(lubridate, verbose = FALSE, warn.conflicts = FALSE)

detection_events <- 
    detection_events %>% 
    mutate(detection_interval = lubridate::interval(first_detection, last_detection))

head(detection_events)
##   event animal_id location mean_latitude mean_longitude
## 1     1       153  TTB-002      43.39165      -83.99264
## 2     2       153  TTB-001      43.38709      -83.98737
## 3     3       153  TTB-002      43.39165      -83.99264
## 4     4       153  TTB-001      43.38709      -83.98737
## 5     5       153  TTB-002      43.39165      -83.99264
## 6     6       153  TTB-001      43.38709      -83.98737
##       first_detection      last_detection num_detections res_time_sec
## 1 2012-04-29 01:48:37 2012-04-29 02:05:33              8         1016
## 2 2012-04-29 02:08:00 2012-04-29 02:08:00              1            0
## 3 2012-04-29 02:08:00 2012-04-29 02:08:00              1            0
## 4 2012-04-29 02:09:50 2012-04-29 02:09:50              1            0
## 5 2012-04-29 02:09:50 2012-04-29 02:09:50              1            0
## 6 2012-04-29 02:12:24 2012-04-29 02:12:24              1            0
##                                 detection_interval
## 1 2012-04-29 01:48:37 UTC--2012-04-29 02:05:33 UTC
## 2 2012-04-29 02:08:00 UTC--2012-04-29 02:08:00 UTC
## 3 2012-04-29 02:08:00 UTC--2012-04-29 02:08:00 UTC
## 4 2012-04-29 02:09:50 UTC--2012-04-29 02:09:50 UTC
## 5 2012-04-29 02:09:50 UTC--2012-04-29 02:09:50 UTC
## 6 2012-04-29 02:12:24 UTC--2012-04-29 02:12:24 UTC
# Let's find overlapping events, that is, times that two animals were co-located at a station.
# We'll add the overlapping records for any row to a new column for that row, called overlaps_with

for(event in detection_events$event) {
    detection_events$overlaps_with[event] = paste( # We use paste to create a string of other events
        which(detection_events$location == detection_events$location[event] &  # Make sure that the location is the same
            detection_events$event != event &  # Make sure the event is not the same
            lubridate::int_overlaps(detection_events$detection_interval[event], detection_events$detection_interval) 
            # We can use lubridate's int_overlaps function to find the overlapping events
        ),
        collapse=",")
}

head(detection_events) 
##   event animal_id location mean_latitude mean_longitude
## 1     1       153  TTB-002      43.39165      -83.99264
## 2     2       153  TTB-001      43.38709      -83.98737
## 3     3       153  TTB-002      43.39165      -83.99264
## 4     4       153  TTB-001      43.38709      -83.98737
## 5     5       153  TTB-002      43.39165      -83.99264
## 6     6       153  TTB-001      43.38709      -83.98737
##       first_detection      last_detection num_detections res_time_sec
## 1 2012-04-29 01:48:37 2012-04-29 02:05:33              8         1016
## 2 2012-04-29 02:08:00 2012-04-29 02:08:00              1            0
## 3 2012-04-29 02:08:00 2012-04-29 02:08:00              1            0
## 4 2012-04-29 02:09:50 2012-04-29 02:09:50              1            0
## 5 2012-04-29 02:09:50 2012-04-29 02:09:50              1            0
## 6 2012-04-29 02:12:24 2012-04-29 02:12:24              1            0
##                                 detection_interval overlaps_with
## 1 2012-04-29 01:48:37 UTC--2012-04-29 02:05:33 UTC              
## 2 2012-04-29 02:08:00 UTC--2012-04-29 02:08:00 UTC              
## 3 2012-04-29 02:08:00 UTC--2012-04-29 02:08:00 UTC              
## 4 2012-04-29 02:09:50 UTC--2012-04-29 02:09:50 UTC              
## 5 2012-04-29 02:09:50 UTC--2012-04-29 02:09:50 UTC              
## 6 2012-04-29 02:12:24 UTC--2012-04-29 02:12:24 UTC
# Now that we've got our overlapping detection events, let's see which ones overlap with others

detection_events %>% 
    select(-one_of("detection_interval")) %>% 
    filter(detection_events$overlaps_with != '') %>%
    head()
##   event animal_id location mean_latitude mean_longitude
## 1  1513        22  MAU-003      41.57612      -83.61071
## 2  1530        22  MAU-011      41.63532      -83.53083
## 3  1532        22  MAU-011      41.63532      -83.53083
## 4  1534        22  MAU-011      41.63532      -83.53083
## 5  1536        22  MAU-011      41.63532      -83.53083
## 6  1538        22  MAU-011      41.63532      -83.53083
##       first_detection      last_detection num_detections res_time_sec
## 1 2012-03-27 17:42:08 2012-03-27 21:02:42             13        12034
## 2 2012-04-04 21:20:18 2012-04-04 21:20:18              1            0
## 3 2012-04-04 21:25:20 2012-04-04 21:25:20              1            0
## 4 2012-04-04 21:27:53 2012-04-04 21:27:53              1            0
## 5 2012-04-04 21:29:54 2012-04-04 21:29:54              1            0
## 6 2012-04-04 21:32:26 2012-04-04 21:32:26              1            0
##                                                                                                                                 overlaps_with
## 1 3264,3268,3272,3276,3280,3282,3284,3287,3290,3293,3296,3300,3302,3305,3308,3311,3313,3315,3317,3319,3323,3325,3327,3331,3334,3337,3340,3342
## 2                                                                                                                                        3400
## 3                                                                                                                                        3400
## 4                                                                                                                                        3400
## 5                                                                                                                                        3400
## 6                                                                                                                                        3400
# Our detection events dataframe is also a useful intermediary dataset for creating summaries of animal 
# presence per station. This also shows you how well you can read a dplyr pipeline to see what you're doing
# to the data, provided you name things in readable ways.

summary_data <- 
    detection_events %>% 
    group_by(location) %>%                              # Here we group our detection events by location, 
    summarise(detection_count = sum(num_detections),    # do a total tally of the raw detections
              num_unique_tags = n_distinct(animal_id),  # count the number of unique animals at each location, 
              total_residence_time_in_seconds = sum(detection_interval),  # sum up the total time of the intervals
              latitude = mean(mean_latitude),           # and for datasets that cross receiver deployment histories, 
              longitude = mean(mean_longitude))         # average the lat/lon of each deployment per station.

head(summary_data)
## # A tibble: 6 x 6
##   location detection_count num_unique_tags total_residence… latitude
##   <chr>              <int>           <int>            <dbl>    <dbl>
## 1 DRF-004               62               1            13685     42.2
## 2 DRL-004               61               1            13699     42.1
## 3 DRL-010               83               1            13797     42.1
## 4 DRL-011               42               1             9399     42.1
## 5 DRU-001                7               1              201     42.4
## 6 DRU-002               23               1             5549     42.4
## # … with 1 more variable: longitude <dbl>

Plot.ly

Plotly is a library for creating interactive plots, but you can also coerce it into making static plots. It has implementations in R, Python and Javascript, and it’s one of many options for creating customized, interactive/static plots of all kinds. It’s got fairly good documentation at https://plot.ly/r and we’ll go over some of the functionality here while we use it to introspect our data visually.

library(plotly, verbose = FALSE, warn.conflicts = FALSE)

# Like the standard abacus plot, for example:
abacus_plot <-
    filtered_detections %>% 
    filter(!str_detect(station, "lost")) %>% 
    ggplot(aes(x = detection_timestamp_utc, y = animal_id, color = deploy_lat)) +
    geom_point(size=5) +
    ylab("Animal ID") + xlab("Date") + labs(color = "Detection latitude") +
    theme_minimal(base_size = 20, base_family = "", base_rect_size = 60)

#Jupyter Notebook users: use this to resize your plot.
#options(repr.plot.width=20, repr.plot.height=10)
## Show our static plot
abacus_plot

ggplotly(abacus_plot)
geo <- list(
  #   scope = 'north america',
  showland = TRUE,
  landcolor = toRGB("#7BB992"),
  showocean = TRUE,
  oceancolor = toRGB("#A0AAB4"),
  showrivers = TRUE,
  rivercolor = toRGB("#A0AAB4"),
  showlakes = TRUE,
  lakecolor = toRGB("#A0AAB4"),
  showcountries = TRUE,
  resolution = 50,
  center = list(lat = ~median(latitude),
                lon = ~median(longitude)),
  lonaxis = list(range=c(~min(longitude) - 4, ~max(longitude) + 4)),
  lataxis = list(range=c(~min(latitude) - 4, ~max(latitude) + 4))
)
map <- summary_data %>%
    filter(!str_detect(location, "lost")) %>%
    plot_geo(lat = ~latitude, lon = ~longitude, color = ~detection_count, height = 900 )%>%
    add_markers(
        text = ~paste(location, ': ', detection_count,'detections', ' & ', total_residence_time_in_seconds, ' seconds of residence time'),
        hoverinfo = "text",
        size = ~c(detection_count/10)#  + total_residence_time_in_seconds/3600)
    )%>%
    layout(title = "Detections in the Great Lakes", geo = geo)


map  
## Warning: `line.width` does not currently support multiple values.
mapbox <- summary_data %>%
  filter(!str_detect(location, "lost")) %>%
  plot_mapbox(lat = ~latitude, lon = ~longitude, color = ~detection_count , height = 900) %>%
  add_markers(
    text = ~paste(location, ': ', detection_count,'detections', ' & ', total_residence_time_in_seconds, ' seconds of residence time'),
    hoverinfo = "text",
    size = ~c(detection_count/10  + total_residence_time_in_seconds/3600)
  )%>%
  layout( mapbox = list(zoom = 7,
                        center = list(lat = ~median(latitude),
                                      lon = ~median(longitude))
  ))

mapbox
## Warning: `line.width` does not currently support multiple values.